library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::arrange() masks plyr::arrange()
## x purrr::compact() masks plyr::compact()
## x dplyr::count() masks plyr::count()
## x dplyr::failwith() masks plyr::failwith()
## x dplyr::filter() masks stats::filter()
## x dplyr::id() masks plyr::id()
## x dplyr::lag() masks stats::lag()
## x dplyr::mutate() masks plyr::mutate()
## x dplyr::rename() masks plyr::rename()
## x dplyr::summarise() masks plyr::summarise()
## x dplyr::summarize() masks plyr::summarize()
library(stringr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following objects are masked from 'package:plyr':
##
## arrange, mutate, rename, summarise
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# import dataset
# tran <- read.csv(file = './data/transactions.csv')
accounts <- read.csv(file ='./data/accounts_analytical.csv')
accounts$credit_cards[is.na(accounts$credit_cards)] <-0
accounts$loan_status[is.na(accounts$loan_status)] <- "None"
credit_cards = table(accounts$credit_cards)
accounts["avg_balance"] = (accounts$max_balance+accounts$min_balance)/2
fig <- plot_ly(data = accounts,x = ~avg_balance, y = ~credit_cards,alpha = 1,type = "scatter",
text = ~paste("Loan: ", accounts$loan_status),color = accounts$loan_status
)
fig <- fig %>%
layout(
title = "Characteristic of accounts",
yaxis = list(dtick = 1, tick0 = 0, tickmode = "linear"
))
fig
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
# Python for this task
acc = pd.read_csv("./data/accounts_analytical.csv")
acc.credit_cards.fillna(0, inplace=True)
acc.loan_status.fillna("none", inplace=True)
acc["avg_balance"] = (acc.max_balance+acc.min_balance)/2
# categorize by loan status
cr0 = acc[acc["loan_status"]=="none"]
cr1 = acc[acc["loan_status"]=="current"]
cr2 = acc[acc["loan_status"]=="expired"]
plt.scatter(cr0.avg_balance, cr0.credit_cards, label="none")
plt.scatter(cr1.avg_balance, cr1.credit_cards, label="current")
plt.scatter(cr2.avg_balance, cr2.credit_cards, label="expired")
plt.legend()
plt.yticks([0,1,2])
## ([<matplotlib.axis.YTick object at 0x7ffe4eae1e10>, <matplotlib.axis.YTick object at 0x7ffe4eae19e8>, <matplotlib.axis.YTick object at 0x7ffe56527e80>], [Text(0, 0, ''), Text(0, 0, ''), Text(0, 0, '')])
plt.xlabel("avg_balance")
plt.ylabel("Number of credit cards")
plt.title("Characteristic of accounts")
plt.show()
```